Differential

Preamble

Dependencies

Code
library(edgeR)
library(dplyr)
library(tidyr)
library(ggplot2)
library(scuttle)
library(pheatmap)
library(patchwork)
library(ggbeeswarm)
library(EnhancedVolcano)
library(SingleCellExperiment)

Loading

Code
sce <- readRDS(file.path("..", "outs", "01-sce.rds"))

Utils

Code
.volcano <- \(df, title, fdr = 0.05, lfc = 1) {
  EnhancedVolcano(df, 
    x = "logFC", y = "FDR",
    FCcutoff = lfc, pCutoff = fdr,
    pointSize = 1, raster = TRUE,
    title = title, subtitle = NULL,
    lab = df[["gene"]], labSize = 2, 
    drawConnectors = TRUE, widthConnectors = 0.5) +
  guides(col = guide_legend(override.aes = list(alpha = 1, size = 3))) +
  theme_bw(9) + theme(
    aspect.ratio = 1,
    legend.title = element_blank(),
    panel.grid.minor = element_blank())
}

Analysis

Code
# exclude LN for DGE analysis
sub <- sce[, sce$TissueSub != "LN"]
df <- data.frame(colData(sub), t(logcounts(sub)), check.names = FALSE)
gg <- pivot_longer(df, any_of(rownames(sub)), names_to = "gene", values_to = "expr")
Code
# setup subtype groupings
names(tum) <- tum <- c("RCC", "LSCC")
names(kid) <- kid <- c("Kidney", "RCC")
names(lun) <- lun <- c("Alveoles", "LSCC")
names(tls) <- tls <- c("E_TLS", "SFL_TLS", "PFL_TLS", "Tcell_TLS")
# split data by tumor type & TLS
dat <- c(list(all = sub,
    RCC = sub[, sub$TumorType == "RCC"], 
    LSCC = sub[, sub$TumorType == "LSCC"]),
    lapply(tls, \(.) sub[, sub$TissueSub == .]))
Code
ref <- c(
    all = unname(kid[1]),
    RCC = unname(kid[1]), 
    LSCC = unname(lun[1]), 
    sapply(tls, \(.) "LSCC"))
names(ids) <- ids <- names(dat)
dat <- dat[ids]; ref <- ref[ids]
fit <- lapply(ids, \(.) {
    # setup design matrix
    df <- data.frame(colData(dat[[.]]))
    if (. %in% tls) {
        tt <- factor(df$TumorType)
        df$TumorType <- relevel(tt, ref[.])
        mm <- model.matrix(~0+TumorType, df)
        colnames(mm) <- gsub("TumorType", "", colnames(mm))
    } else {
        st <- droplevels(df$TissueSub)
        df$TissueSub <- relevel(st, ref[.])
        mm <- model.matrix(~0+TissueSub, df)
        colnames(mm) <- gsub("TissueSub", "", colnames(mm))
    }
    # fit GLM model
    dgl <- DGEList(assay(dat[[.]]))
    dgl <- calcNormFactors(dgl)
    dgl <- estimateDisp(dgl, mm)
    fit <- glmQLFit(dgl, mm)
})
Code
# setup contrasts
cs <- c(list(
    # within tumor types
    RCC = c(
        # normal/tumor vs. any TLS
        list(TLS = list(kid, tls)), 
        # normal/tumor vs. specific TLS
        lapply(tls, \(t) list(kid, t)),
        # TLS maturation stages
        list(
            SFL = list("E_TLS", "SFL_TLS"),
            Tcell = list("E_TLS", "Tcell_TLS"))),
    LSCC = c(
        # normal/tumor vs. any TLS
        list(TLS = list(lun, tls)), 
        # normal/tumor vs. specific TLS
        lapply(tls, \(t) list(lun, t)),
        # TLS maturation stages
        list(
            SFL = list("E_TLS", "SFL_TLS"),
            Tcell = list("E_TLS", "Tcell_TLS")))),
    # kidney/RCC vs. lung/LSCC
    list(all = list(parenchyma = list(kid, lun))),
    # across tumor types, within TLS subsets
    lapply(tls, \(.) setNames(list("RCC"), .)))
cs <- lapply(ids, \(.) {
    x <- numeric(ncol(mm <- fit[[.]]$design))
    lapply(cs[[.]], \(c) {
        if (length(c) == 1) {
            i <- match(c, colnames(mm))
            x[i] <- 1; x[-i] <- -1
        } else {
            a <- match(c[[1]], colnames(mm))
            b <- match(c[[2]], colnames(mm))
            x[a] <- -1/sum(a != 0)
            x[b] <- 1/sum(b != 0)
        }
        return(x)
    })
})
Code
# run DGE analysis
res <- lapply(ids, \(.) {
    lapply(names(cs[[.]]), \(c) {
        ht <- glmQLFTest(fit[[.]], contrast = cs[[.]][[c]])
        tt <- topTags(ht, n = Inf)$table
        data.frame(row.names = NULL,
            gene = rownames(tt), tt,
            contrast = c, subset = .)
    }) |> do.call(what = rbind)
}) |> do.call(what = rbind)
rownames(res) <- NULL

Visualization

Volcano

Code
df <- res[res$subset == "all", ]
.volcano(df, title = "parenchyma", fdr = 0.05, lfc = 1) + 
    theme(legend.position = "top")

Code
ps <- lapply(tum, \(.) {
    df <- res[res$subset == . & res$contrast == "TLS", ]
    .volcano(df, title = ., fdr = 1e-4, lfc = 1.25)
})
wrap_plots(ps, nrow = 1) + 
  plot_layout(guides = "collect") &
  theme(legend.position = "top")

Code
ps <- lapply(tls, \(.) {
    df <- res[res$subset == . & grepl("TLS", res$contrast), ]
    .volcano(df, title = ., fdr = 1e-3, lfc = 0.5)
})
wrap_plots(ps, nrow = 2) + 
  plot_layout(guides = "collect") &
  theme(legend.position = "top")

Code
df <- res[res$contrast %in% c("SFL", "Tcell"), ]
ps <- lapply(unique(df$subset), \(s) {
    lapply(unique(df$contrast), \(c) {
        df <- res[res$subset == s & res$contrast == c, ]
        .volcano(df, fdr = 1e-4, lfc = 1.25, title = paste(s, c, sep = ","))
    })
})
wrap_plots(Reduce(c, ps), nrow = 2) + 
  plot_layout(guides = "collect") &
  theme(legend.position = "top")

Heatmap

Code
top <- res %>%
    filter(FDR < 0.05) %>% 
    filter(subset == "all") %>%
    slice_max(abs(logFC), n = 80)
mtx <- logcounts(sub)[top$gene, sub$TissueSub %in% c(kid, lun)]
cd <- data.frame(colData(sub))[c("TissueSub")]
hm <- pheatmap(mtx, 
    main = "parenchyma", fontsize = 6,
    col = rev(hcl.colors(51, "RdBu")),
    scale = "row", show_colnames = FALSE, annotation_col = cd)

Code
top <- res %>%
    filter(!subset %in% tls) %>%
    group_by(subset) %>%
    filter(FDR < 0.1, contrast == "TLS") %>% 
    slice_max(abs(logFC), n = 40) %>% 
    split(.$subset)
for (. in names(top)) {
    cat("####", ., "\n")
    mtx <- logcounts(sub)[top[[.]]$gene, sub$TumorType == .]
    cd <- data.frame(colData(sub))[c("TissueSub")]
    hm <- pheatmap(mtx, 
        main = ., fontsize = 6,
        col = rev(hcl.colors(51, "RdBu")),
        scale = "row", show_colnames = FALSE, annotation_col = cd)
    print(hm); cat("\n\n")
}

LSCC

RCC

Code
top <- res %>%
    group_by(subset) %>%
    filter(contrast %in% c("SFL", "Tcell")) %>%
    slice_max(abs(logFC), n = 40) %>% 
    split(.$subset)
for (. in names(top)) {
    cat("####", ., "\n")
    idx <- sub$TumorType == . & sub$TissueSub %in% c(
        "E_TLS", paste0(unique(top[[.]]$contrast), "_TLS"))
    mtx <- logcounts(sub)[top[[.]]$gene, idx]
    cd <- data.frame(colData(sub))[c("TissueSub")]
    hm <- pheatmap(mtx, 
        main = ., fontsize = 6,
        col = rev(hcl.colors(51, "RdBu")),
        scale = "row", show_colnames = FALSE, annotation_col = cd)
    print(hm); cat("\n\n")
}

LSCC

RCC

Boxplot

Code
top <- res %>%
    filter(subset %in% tls) %>%
    group_by(subset) %>%
    slice_max(abs(logFC), n = 25) %>% 
    split(.$subset)
for (. in names(top)) {
    cat("####", ., "\n")
    plt <- ggplot(
        filter(gg, 
            TissueSub == .,
            gene %in% top[[.]]$gene),
        aes(TumorType, expr, fill = TumorType)) +
        facet_wrap(~ gene, scales = "free_y") +
        geom_boxplot(size = 0.1, fill = NA, outlier.color = NA, show.legend = FALSE) + 
        geom_beeswarm(shape = 21, col = "black", stroke = 0.1, size = 1.2, alpha = 0.8) + 
        guides(fill = guide_legend(override.aes = list(size = 3, alpha = 1))) +
        labs(x = NULL, y = "Expression (logCPM)") +
        scale_fill_brewer(palette = "Set2") +
        theme_linedraw(9) + theme(
            panel.grid = element_blank(),
            axis.text.x = element_blank(),
            axis.ticks.x = element_blank(),
            strip.background = element_blank(),
            legend.key.size = unit(0.5, "lines"),
            strip.text = element_text(color = "black", face = "bold"))
    print(plt); cat("\n\n")
}

E_TLS

PFL_TLS

SFL_TLS

Tcell_TLS

Appendix

Code
saveRDS(res, file.path("..", "outs", "02-dge.rds"))
sessionInfo()
R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0 
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=de_CH.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=de_CH.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=de_CH.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=de_CH.UTF-8 LC_IDENTIFICATION=C       

time zone: Europe/Zurich
tzcode source: system (glibc)

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] EnhancedVolcano_1.18.0      ggrepel_0.9.3              
 [3] ggbeeswarm_0.7.2            patchwork_1.1.3            
 [5] pheatmap_1.0.12             scuttle_1.10.2             
 [7] SingleCellExperiment_1.22.0 SummarizedExperiment_1.30.2
 [9] Biobase_2.60.0              GenomicRanges_1.52.0       
[11] GenomeInfoDb_1.36.3         IRanges_2.34.1             
[13] S4Vectors_0.38.1            BiocGenerics_0.46.0        
[15] MatrixGenerics_1.12.3       matrixStats_1.0.0          
[17] ggplot2_3.4.3               tidyr_1.3.0                
[19] dplyr_1.1.3                 edgeR_3.42.4               
[21] limma_3.56.2               

loaded via a namespace (and not attached):
 [1] beeswarm_0.4.0            gtable_0.3.4             
 [3] xfun_0.40                 htmlwidgets_1.6.2        
 [5] lattice_0.22-5            Cairo_1.6-1              
 [7] vctrs_0.6.3               tools_4.3.2              
 [9] bitops_1.0-7              generics_0.1.3           
[11] parallel_4.3.2            tibble_3.2.1             
[13] fansi_1.0.4               pkgconfig_2.0.3          
[15] Matrix_1.6-1              RColorBrewer_1.1-3       
[17] sparseMatrixStats_1.12.2  lifecycle_1.0.3          
[19] GenomeInfoDbData_1.2.10   farver_2.1.1             
[21] compiler_4.3.2            munsell_0.5.0            
[23] codetools_0.2-19          vipor_0.4.5              
[25] htmltools_0.5.6           RCurl_1.98-1.12          
[27] yaml_2.3.7                pillar_1.9.0             
[29] crayon_1.5.2              BiocParallel_1.34.2      
[31] DelayedArray_0.26.7       abind_1.4-5              
[33] tidyselect_1.2.0          locfit_1.5-9.8           
[35] digest_0.6.33             purrr_1.0.2              
[37] labeling_0.4.3            splines_4.3.2            
[39] fastmap_1.1.1             grid_4.3.2               
[41] colorspace_2.1-0          cli_3.6.1                
[43] magrittr_2.0.3            S4Arrays_1.0.6           
[45] utf8_1.2.3                withr_2.5.1              
[47] DelayedMatrixStats_1.22.6 scales_1.2.1             
[49] rmarkdown_2.24            XVector_0.40.0           
[51] beachmat_2.16.0           evaluate_0.21            
[53] ggrastr_1.0.2             knitr_1.44               
[55] rlang_1.1.1               Rcpp_1.0.11              
[57] glue_1.6.2                rstudioapi_0.15.0        
[59] jsonlite_1.8.7            R6_2.5.1                 
[61] zlibbioc_1.46.0